相关文件请自行创建!!!

package com.hadoop.hdfs;

import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.IOUtils; /**

  • 合并小文件至 HDFS

*/ public class MergeSmallFilesToHDFS { private static FileSystem fs = null; private static FileSystem local = null;

public static void main(String[] args) throws IOException,
		URISyntaxException {
	list();
}

/**
 * 数据集合并,并上传至HDFS
 * throws IOException
 *throws URISyntaxException
 */
public static void list() throws IOException, URISyntaxException {
	// 读取hadoop文件系统的配置
	Configuration conf = new Configuration();
	//文件系统访问接口,注意:hdfs://master:9000修改成自己的HDFS地址
	URI uri = new URI("hdfs://master:9000");
	//创建FileSystem对象
	fs = FileSystem.get(uri, conf);
	// 获得本地文件系统
	local = FileSystem.getLocal(conf);
	//过滤目录下的 svn文件,注意:文件路径E://Hadoop/73/修改成自己的路径
	FileStatus[] dirstatus = local.globStatus(new Path("E://Hadoop/73/*"),new RegexExcludePathFilter("^.*svn$"));
	//获取73目录下的所有文件路径
	Path[] dirs = FileUtil.stat2Paths(dirstatus);
	FSDataOutputStream out = null;
	FSDataInputStream in = null;
	for (Path dir : dirs) {
		//2019-10-31
		String fileName = dir.getName().replace("-", "");//文件名称
		//只接受日期目录下的.txt文件
		FileStatus[] localStatus = local.globStatus(new Path(dir+"/*"),new RegexAcceptPathFilter("^.*txt$"));
		// 获得日期目录下的所有文件
		Path[] listedPaths = FileUtil.stat2Paths(localStatus);
		//输出路径,注意:hdfs://master:9000/20191031/修改成自己的HDFS目录地址
		Path block = new Path("hdfs://master:9000/20191031/"+ fileName + ".txt");
		System.out.println("合并后的文件名称:"+fileName+".txt");
		// 打开输出流
		out = fs.create(block);			
		for (Path p : listedPaths) {
			in = local.open(p);// 打开输入流
			IOUtils.copyBytes(in, out, 4096, false); // 复制数据
			// 关闭输入流
			in.close();
		}
		if (out != null) {
			// 关闭输出流
			out.close();
		}
	}
	
}

/**
 * 
 *  过滤 regex 格式的文件
 *
 */
public static class RegexExcludePathFilter implements PathFilter {
	private final String regex;
	public RegexExcludePathFilter(String regex) {
		this.regex = regex;
	}

	
	public boolean accept(Path path) {
		boolean flag = path.toString().matches(regex);
		return !flag;
	}

}

/**
 * 
 *  接受 regex 格式的文件
 *
 */
public static class RegexAcceptPathFilter implements PathFilter {
	private final String regex;
	public RegexAcceptPathFilter(String regex) {
		this.regex = regex;
	}

	@Override
	public boolean accept(Path path) {
		boolean flag = path.toString().matches(regex);
		return flag;
	}

}

}